The dataset considered is the Steam Video Games Dataset. This dataset is a list of user behaviors, with columns: user-id, game-title, behavior-name, value. The behaviors included are ‘purchase’ and ‘play’. The value indicates the degree to which the behavior was performed - in the case of ‘purchase’ the value is always 1, and in the case of ‘play’ the value represents the number of hours the user has played the game.
raw_data = as_tibble(
read.csv("steam-200k.csv", header=F,
col.names = c( "user-id","game-title", "behavior-name", "value", "unknown")
)
)
head(raw_data)
line_data =
raw_data %>%
filter(behavior.name == "play") %>%
group_by(game.title) %>%
count() %>%
ungroup() %>%
arrange(desc(n)) %>%
mutate(rnum=row_number())
## TODO remove
#line_data %>%
# ggplot(aes(x=rnum, y=n)) +
# geom_line()
fig <- plot_ly(line_data, x = ~rnum)
fig <- fig %>% add_lines(y = ~n)
fig <- fig %>% layout(
title = "Most played Games",
xaxis = list(
# TODO add buttons
#rangeselector = list(
# #buttons = list(
# # list(
# # #count = 3,
# # label = "3 mo",
# # #step = 1,
# # #stepmode = "backward"
# # ),
# # #list(step = "all"))),
# # list(label="lol"))),
title = "Games by Popularity",
rangeslider = list(type = "int")
),
yaxis = list(title = "Num. Of Players"))
fig